{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Setup\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%capture\n",
    "%pip install poetry\n",
    "%pip install git+https://github.com/oughtinc/ergo.git@f5646b672eb0d60c58e7de850eea5f43a4feaacc\n",
    "%pip install xlrd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%load_ext google.colab.data_table"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%capture\n",
    "import ergo\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import ssl\n",
    "import warnings\n",
    "import requests\n",
    "from datetime import timedelta, date\n",
    "from ergo.contrib.el_paso import texas_data, onlyasith, krismoore, brachbach, shaman\n",
    "from ergo.contrib.utils import plot_question, question, rejection_sample, sample_from_ensemble, samplers, summarize_question_samples, daterange"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "warnings.filterwarnings(action=\"ignore\", category=FutureWarning)\n",
    "warnings.filterwarnings(module=\"plotnine\", action=\"ignore\")\n",
    "warnings.filterwarnings(module=\"jax\", action=\"ignore\")\n",
    "ssl._create_default_https_context = ssl._create_unverified_context"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "metaculus = ergo.Metaculus(\n",
    "    username=\"oughtpublic\", \n",
    "    password=\"123456\",\n",
    "    api_domain = \"pandemic\"\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Retrieve external data and models\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Texas government cases data\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "el_paso_cases = texas_data.get_el_paso_data()\n",
    "\n",
    "el_paso_cases"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## @KrisMoore's compiled data\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Pulled from\n",
    "[here](https://docs.google.com/spreadsheets/d/1eGF9xYmDmvAkr-dCmd-N4efHzPyYEfVl0YmL9zBvH9Q/edit#gid=1694267458)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "compiled_data = krismoore.get_krismoore_data()\n",
    "\n",
    "krismoore.graph_compiled_data(compiled_data)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## @onlyasith's cases model\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Pulled from\n",
    "[here](https://docs.google.com/spreadsheets/d/1L6pzFAEJ6MfnUwt-ea6tetKyvdi0YubnK_70SGm436c/edit#gid=1807978187)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "projected_cases = onlyasith.get_onlyasith_results()\n",
    "\n",
    "projected_cases"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Shaman et al. cases model\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Pulled from [here](https://github.com/shaman-lab/COVID-19Projection)\n",
    "([paper](https://www.medrxiv.org/content/10.1101/2020.03.21.20040303v2))\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cu_projections = shaman.load_cu_projections(\"El Paso County TX\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## @brachbach model: cases -> hospitalized\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "get_daily_hospital_confirmed = brachbach.get_daily_hospital_confirmed"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Access historical data\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_historical_data(date: date, column_name):\n",
    "    \"\"\"\n",
    "    Look up the value of a parameter on a given date\n",
    "    in the historical data we've loaded.\n",
    "\n",
    "    Return the value or raise a KeyError if we don't have it.\n",
    "    \"\"\"\n",
    "\n",
    "    # prefer Texas government data over @KrisMoore compiled data\n",
    "    try: \n",
    "        value = el_paso_cases.loc[date, column_name]\n",
    "        if np.isnan(value):\n",
    "            raise KeyError(f\"value for {column_name} in el_paso_cases is NaN\")\n",
    "    except KeyError:\n",
    "        value = compiled_data.loc[date, column_name]\n",
    "        if np.isnan(value):\n",
    "            raise KeyError(f\"value for {column_name} in compiled_data is NaN\")\n",
    "    return value"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Model components\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "In this notebook, we model some aspects of how the COVID-19 pandemic\n",
    "will play out in El Paso. Our goal is to answer the questions in the\n",
    "[El\n",
    "Paso Metaculus series](https://pandemic.metaculus.com/questions/?search=cat:internal--el-paso).\n",
    "\n",
    "In this section, we model some key variables that we'll use to answer\n",
    "the Metaculus questions in the next section. We sometimes ensemble\n",
    "multiple models to try to get a better estimate of a variable.\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "START_DATE = date(2020, 4, 1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Daily COVID Infections\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Shaman Model\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "@ergo.mem\n",
    "def cu_model_scenario():\n",
    "    \"\"\"Which of the model scenarios are we in?\"\"\"\n",
    "    return ergo.random_choice([s for s in cu_projections.keys()])\n",
    "\n",
    "@ergo.mem\n",
    "def cu_model_quantile():\n",
    "    \"\"\"Where in the distribution of model outputs are we for this model run?\n",
    "    Want to be consistent across time, so we sample it once per model run\"\"\"\n",
    "    return ergo.uniform()\n",
    "\n",
    "def cu_projection(param: str, date: date) -> int:\n",
    "    \"\"\"\n",
    "    Get the Columbia model's prediction\n",
    "    of the param for the date\n",
    "    \"\"\"\n",
    "    scenario = cu_model_scenario()\n",
    "    quantile = cu_model_quantile()\n",
    "\n",
    "    # Extract quantiles of the model distribution\n",
    "    xs = np.array([0.025, 0.25, 0.5, 0.75, 0.975])\n",
    "    scenario_df = cu_projections[scenario]\n",
    "    param_df = scenario_df[scenario_df[\"var\"] == param]\n",
    "    date_df = param_df[param_df[\"Date\"] == date]\n",
    "    if date_df.empty:\n",
    "        raise KeyError(f\"No Columbia project for param: {param}, date: {date}\")\n",
    "\n",
    "    ys = np.array(date_df[[\"2.5\", \"25\",  \"50\",  \"75\",  \"97.5\"]].iloc[0])\n",
    "    # Linearly interpolate\n",
    "    return int(round(np.interp(quantile, xs, ys)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### @onlyasith model+\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "@ergo.mem\n",
    "def daily_infections_onlyasith(date: date) -> int:\n",
    "    \"\"\"\n",
    "    What is the number of reported (new) Covid-19 infections on [date]?\n",
    "    \"\"\"\n",
    "    try:\n",
    "        # Look up projections from @onlyasith's model\n",
    "        cases = projected_cases.loc[date, \"New cases\"]\n",
    "        if np.isnan(cases):\n",
    "            raise KeyError\n",
    "\n",
    "        # Add some (fairly arbitrary) uncertainty around this point estimate\n",
    "        if cases == 0:\n",
    "          return cases\n",
    "        cases_estimate = ergo.lognormal_from_interval(cases * 0.8, cases * 1.2)\n",
    "        return int(np.clip(cases_estimate, cases * 0.5, cases * 2).round())\n",
    "    except KeyError:\n",
    "        # We're beyond the time range for data and model\n",
    "        return 0"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Get historical data or predict using ensemble"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "@ergo.mem\n",
    "def daily_infections(date: date) -> int:\n",
    "    \"\"\"\n",
    "    What is the number of reported (new) Covid-19 infections on [date]?\n",
    "    \"\"\"\n",
    "    try:\n",
    "        return get_historical_data(date, \"New cases\")\n",
    "    except KeyError:  # if we don't have historical data, use our ensemble of models\n",
    "        return sample_from_ensemble([\n",
    "            lambda date: cu_projection(\"cases\", date), \n",
    "            daily_infections_onlyasith\n",
    "        ], {\"date\": date}, [.8, .2], fallback=True, default=0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Daily COVID infections: mean, sma, peak\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "@ergo.mem\n",
    "def mean_infections(start_date: date, end_date: date):\n",
    "    \"\"\"\n",
    "    What is the average number of reported new infections for this range of \n",
    "    dates? (Including start date, excluding end date)\n",
    "    \"\"\"\n",
    "    days = daterange(start_date, end_date)\n",
    "    return np.mean([daily_infections(day) for day in days])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "@ergo.mem\n",
    "def sma_infections(date: date):\n",
    "    \"\"\"\n",
    "    The simple moving average of infections for a date.\n",
    "\n",
    "    Defined in https://pandemic.metaculus.com/questions/4128:\n",
    "\n",
    "    'The 2-day SMA is defined as the unweighted average (arithmetic mean)\n",
    "    over the current day and the previous day.'\n",
    "    \"\"\"\n",
    "    return mean_infections(date - timedelta(1), date + timedelta(1))\n",
    "\n",
    "@ergo.mem\n",
    "def peak_compatible_with_historical_data(peak_date):\n",
    "    \"\"\"\n",
    "    Check whether it's possible that some date in the past\n",
    "    was the peak date of new COVID infections in El Paso,\n",
    "    given the historical data we have about COVID infections\n",
    "    \"\"\"\n",
    "    if not peak_date in el_paso_cases.index:\n",
    "        return True\n",
    "    for comparison_date in daterange(START_DATE, peak_date + timedelta(11)):\n",
    "        if comparison_date not in el_paso_cases.index:\n",
    "            continue\n",
    "        if sma_infections(comparison_date) > sma_infections(peak_date):\n",
    "            return False\n",
    "        if sma_infections(comparison_date) == sma_infections(peak_date) and comparison_date > peak_date:\n",
    "            return False\n",
    "    return True\n",
    "\n",
    "\n",
    "@ergo.mem\n",
    "def peak_infection_date_community():\n",
    "    \"\"\"\n",
    "    The community assigns probability to some dates in the past\n",
    "    that we already know were not the peak.\n",
    "    So instead of sampling from the full community distribution,\n",
    "    sample from the portion of the community distribution\n",
    "    that is plausibly correct.\n",
    "    \"\"\"    \n",
    "    peak_date = rejection_sample(\n",
    "        peak_infection_date.question.sample_community, \n",
    "        peak_compatible_with_historical_data)\n",
    "    return peak_date"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Patients with COVID in the hospital"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### @brachbach model+"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "br_confirmed_from_infected_model = get_daily_hospital_confirmed(compiled_data, daily_infections)\n",
    "\n",
    "def brachbach_model_plus(date: date) -> int:\n",
    "    \"\"\"\n",
    "    Get a point estimate from @brachbach's model,\n",
    "    then add some (fairly arbitrary) uncertainty\n",
    "    \"\"\"\n",
    "    cases = br_confirmed_from_infected_model(date)\n",
    "\n",
    "    if cases == 0:\n",
    "      return cases\n",
    "    cases_estimate = ergo.lognormal_from_interval(cases * 0.8, cases * 1.2)\n",
    "    return np.clip(cases_estimate, cases * 0.5, cases * 2)\n",
    "    "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Get historical data or predict using ensemble"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "@ergo.mem\n",
    "def hospital_confirmed_for_date(date: date) -> int:\n",
    "    \"\"\"\n",
    "    The total number of lab-confirmed COVID-19 patients in El Paso County in\n",
    "    the hospital on this date\n",
    "    \"\"\"\n",
    "    try:\n",
    "        return get_historical_data(date, \"In hospital confirmed\")\n",
    "    except KeyError:  # if we don't have historical data, use our ensemble of models\n",
    "        return sample_from_ensemble([\n",
    "            lambda date: cu_projection(\"hosp\", date), \n",
    "            brachbach_model_plus\n",
    "        ], {\"date\": date}, [.2, .8], fallback=True, default=0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Proportion ICU admissions requiring ventilation\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "@ergo.mem\n",
    "def frac_icu_ventilation():\n",
    "    \"\"\"\n",
    "    Proportion of ICU admissions requiring ventilation\n",
    "\n",
    "    Approach (PabloStafforini et al): \n",
    "    https://pandemic.metaculus.com/questions/4154/#comment-28155\n",
    "\n",
    "    TODO: \n",
    "    - Improve how we use case data\n",
    "    - Add qualitative adjustments\n",
    "    \"\"\"\n",
    "    ventilation_pseudocounts = 25 + 17 + 0.05 * 1150 + 0.1 * 132\n",
    "    icu_pseudocounts = 100 + 36 + 0.05 * 1300 + 0.1 * 196\n",
    "    return ergo.beta_from_hits(ventilation_pseudocounts, icu_pseudocounts)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# El Paso questions\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "@question(metaculus, 4128, community_weight=0.3, community_fn=peak_infection_date_community, start_date=START_DATE)\n",
    "def peak_infection_date() -> date:\n",
    "    \"\"\"\n",
    "    When will El Paso County, Texas, experience its first peak number of COVID\n",
    "    infections?\n",
    "\n",
    "    From https://pandemic.metaculus.com/questions/4128:\n",
    "    'This question resolves as the date for which\n",
    "    the 2-day simple moving average(SMA) of the number of reported new infections\n",
    "    is strictly greater than the 2-day SMA over the subsequent 10 days.'\n",
    "    \"\"\"\n",
    "    end_date = date(2020, 7, 1)\n",
    "    for today in daterange(START_DATE, end_date):\n",
    "        sma_today = sma_infections(today)\n",
    "        future_smas = [sma_infections(today + timedelta(i)) for i in range(1,11)]\n",
    "        if sma_today > max(future_smas):\n",
    "            return today\n",
    "    return end_date\n",
    "\n",
    "plot_question(peak_infection_date, start_date=START_DATE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "@question(metaculus, 4137, community_weight=0.5)\n",
    "def peak_infections():\n",
    "    \"\"\"\n",
    "    How many new infections will be reported in El Paso on the day on which\n",
    "    the number of new reported infections peaks?\n",
    "    \"\"\"\n",
    "    peak = peak_infection_date()\n",
    "    return daily_infections(peak)\n",
    "plot_question(peak_infections)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "@question(metaculus, 4152, community_weight=0.5)\n",
    "def mean_infections_peak345():\n",
    "    \"\"\"\n",
    "    What will the average number of reported daily infections be in El Paso,\n",
    "    over the 3rd, 4th and 5th days after the first \"peak\"?\n",
    "    \"\"\"\n",
    "    peak = peak_infection_date()\n",
    "    return mean_infections(peak + timedelta(3), peak + timedelta(6))\n",
    "plot_question(mean_infections_peak345)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "@question(metaculus, 4170, community_weight=0.8)\n",
    "def mean_infections_peak678():\n",
    "    \"\"\"\n",
    "    What will the average number of reported daily infections be in El Paso,\n",
    "    over the 6th, 7th and 8th days after the first \"peak\"?  \n",
    "    \"\"\"\n",
    "    peak = peak_infection_date()\n",
    "    return mean_infections(peak + timedelta(6), peak + timedelta(9))\n",
    "plot_question(mean_infections_peak678)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "@question(metaculus, 4155, community_weight=0.7)\n",
    "def frac_patients_icu():\n",
    "    \"\"\"\n",
    "    What portion of in-hospital cases in El Paso County will require admission\n",
    "    to the ICU?\n",
    "\n",
    "    Following @katifish's approach:\n",
    "    https://pandemic.metaculus.com/questions/4155/#comment-28054\n",
    "\n",
    "    TODO: Add others from katifish comment\n",
    "    \"\"\"\n",
    "    alpha = 0.1 # Rescaling counts becase we're more uncertain than implied by counts\n",
    "    return ergo.random_choice([\n",
    "      ergo.beta_from_hits(alpha * 121, alpha * 508),\n",
    "      ergo.beta_from_hits(alpha * 181, alpha * 507),\n",
    "    ])\n",
    "plot_question(frac_patients_icu)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "@question(metaculus, 4154, community_weight=0.3)\n",
    "def frac_patients_invasive():\n",
    "    \"\"\"\n",
    "    What portion of in-hospital patients with Covid-19 in El Paso County will\n",
    "    require invasive ventilation?\n",
    "\n",
    "    Following @PabloStafforini's indirect estimation approach:\n",
    "    https://pandemic.metaculus.com/questions/4154/#comment-28155\n",
    "\n",
    "    TODO:\n",
    "    - Combine with direct estimate\n",
    "      direct_estimate = ergo.beta_from_hits(0.1 * 130, 0.1 * 393)\n",
    "    \"\"\"\n",
    "    return frac_patients_icu() * frac_icu_ventilation()\n",
    "plot_question(frac_patients_invasive)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "@ergo.mem\n",
    "def peak_hospitalized_date():\n",
    "    \"\"\"\n",
    "    What will be the date when there are the max number of COVID patients in the hospital\n",
    "    within 15 days before or after the date of the first peak in confirmed cases?\n",
    "    \"\"\"\n",
    "    infection_peak_date = peak_infection_date()\n",
    "    days = list(daterange(infection_peak_date - timedelta(15), infection_peak_date + timedelta(16)))\n",
    "\n",
    "    hospitalization_peak_date = days[0]\n",
    "    hospitalized_peak = 0\n",
    "\n",
    "    for day in days:\n",
    "        hospitalized_for_day = hospital_confirmed_for_date(day)\n",
    "\n",
    "        # if there are 2 different dates\n",
    "        # with the same peak number of hospitalized patients,\n",
    "        # return the first date:\n",
    "        # https://pandemic.metaculus.com/questions/4204#comment-30023\n",
    "        if hospitalized_for_day > hospitalized_peak:\n",
    "            hospitalization_peak_date = day\n",
    "            hospitalized_peak = hospitalized_for_day\n",
    "    \n",
    "    return hospitalization_peak_date\n",
    "\n",
    "@question(metaculus, 4153, community_weight=0.3)\n",
    "def max_30d_hospital_confirmed_for_peak():\n",
    "    \"\"\"\n",
    "    What will the maximum number of in-hospital lab-confirmed COVID-19 \n",
    "    patients in El Paso County, in the 30-day period during which the \"peak\"\n",
    "    occurs?\n",
    "    \"\"\"\n",
    "    return hospital_confirmed_for_date(peak_hospitalized_date())\n",
    "\n",
    "plot_question(max_30d_hospital_confirmed_for_peak, bw=0.01)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "@question(metaculus, 4204)\n",
    "def peak_icu_patients():\n",
    "    \"\"\"\n",
    "    How many patients with Covid-19 in El Paso County will be in the\n",
    "    ICU on the day when the number of hospital admissions of cases peak? \n",
    "    \"\"\"\n",
    "    peak_date = peak_hospitalized_date()\n",
    "    \n",
    "    try:\n",
    "        return get_historical_data(peak_date, \"in_icu\")\n",
    "    # if we don't have historical data, sample from the Columbia model or from the community\n",
    "    except KeyError:\n",
    "        return sample_from_ensemble([\n",
    "            lambda date: cu_projection(\"ICU\", date), \n",
    "            lambda date: peak_icu_patients.question.sample_community()\n",
    "        ], {\"date\": peak_date}, [.65, .35], fallback=True)\n",
    "plot_question(peak_icu_patients, bw=0.1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "@question(metaculus, 4201)\n",
    "def peak_invasive_ventilation():\n",
    "    \"\"\"\n",
    "    How many patients with Covid-19 in El Paso County will be on invasive \n",
    "    ventilation on the day when the number of hospital admissions of cases \n",
    "    peak?\n",
    "    \"\"\"\n",
    "    peak_date = peak_hospitalized_date()\n",
    "\n",
    "    try:\n",
    "        return get_historical_data(peak_hospitalized_date(), \"on_ventilator\")\n",
    "    # if we don't have historical data, sample from the Columbia model or from the community\n",
    "    except KeyError:\n",
    "        return sample_from_ensemble([\n",
    "            lambda date: cu_projection(\"ICU\", date), \n",
    "            lambda date: peak_icu_patients.question.sample_community()\n",
    "        ], {\"date\": peak_date}, [.65, .35], fallback=True)\n",
    "\n",
    "\n",
    "plot_question(peak_invasive_ventilation, bw=0.1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Generate predictions for all questions\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def model():\n",
    "    for sampler in samplers.values():\n",
    "        sampler()\n",
    "\n",
    "samples = ergo.run(model, num_samples=2000)\n",
    "\n",
    "summarize_question_samples(samples)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Compare predictions to community\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This takes a while since we're fitting a mixture of logistic\n",
    "distributions to our samples before visualizing (and submitting) them.\n",
    "\n",
    "These may look a little different from the plots for the questions shown\n",
    "above, because:\n",
    "\n",
    "1. we've taken more samples from the distribution\n",
    "2. rather than showing raw samples, we're fitting logistic distributions that we can submit them to metaculus\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "submissions = {}\n",
    "for sampler in samplers.values():\n",
    "    q = sampler.question\n",
    "\n",
    "    q_samples = samples[sampler.__name__]\n",
    "\n",
    "    if q.id == 4128: # Date question: Need to convert back to date from days (https://github.com/oughtinc/ergo/issues/144)\n",
    "        q_samples = np.array([START_DATE + timedelta(s) for s in q_samples])\n",
    "\n",
    "    if q.id in [4201, 4204, 4137, 4152, 4170, 4153]:\n",
    "      # Clip extreme values for questions that we had issues fitting\n",
    "      (sample_min, sample_max) = np.quantile(q_samples, [0.08, 0.94])\n",
    "      q_samples = q_samples[(q_samples >= sample_min) & (q_samples <= sample_max)]\n",
    "\n",
    "    submission = q.get_submission_from_samples(q_samples)\n",
    "    submissions[q] = submission\n",
    "\n",
    "    # the graph for this question will be too zoomed out unless we cut off more of the graph\n",
    "    if q.id == 4153:\n",
    "      q.show_prediction(q_samples, plot_samples=False, plot_fitted=True, show_community=True, percent_kept=0.7)\n",
    "    else:\n",
    "      q.show_prediction(q_samples, plot_samples=False, plot_fitted=True, show_community=True, percent_kept=0.9)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Should we submit this to Metaculus? If so, uncomment the following lines:\n",
    "# for q, submission in submissions.items(): \n",
    "#     try:\n",
    "#         print(q.submit(submission))\n",
    "#     except requests.HTTPError as e:\n",
    "#         print(e)"
   ]
  }
 ],
 "metadata": {
  "jupytext": {
   "cell_metadata_filter": "-all",
   "main_language": "python",
   "notebook_metadata_filter": "-all"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
